Large analysis workflows are fragile ecosystems of software tools, scripts and dependencies.
This complexity commonly makes these workflows not only irreproducible but sometimes even not re-runnable outside their original development environment.
Even small workflows affected
make to GalaxyCredit: Evan Floden
$PATH, modules, binaries, conda, containers)Credit: Evan Floden
curl -s https://get.nextflow.io | bash
bin/#!/usr/bin/env nextflow
echo true
cheers = Channel.from 'Bonjour', 'Ciao', 'Hello', 'Hola'
process sayHello {
input:
val x from cheers
script:
"""
echo '$x world!'
"""
}
N E X T F L O W ~ version 19.01.0
Pulling nextflow-io/hello ...
downloaded from https://github.com/nextflow-io/hello.git
Launching `nextflow-io/hello` [nasty_wozniak] - revision: a9012339ce [master]
[warm up] executor > local
[f8/52866d] Submitted process > sayHello (1)
[89/8e3d0a] Submitted process > sayHello (2)
[5a/12ca76] Submitted process > sayHello (3)
Bonjour world!
Ciao world!
[1b/e487d8] Submitted process > sayHello (4)
Hello world!
Hola world!
project name: nextflow-io/hello
repository : https://github.com/nextflow-io/hello
local path : /home/rad/.nextflow/assets/nextflow-io/hello
main script : main.nf
revisions :
* master (default)
mybranch
testing
v1.1 [t]
v1.2 [t]
Alternatives
N E X T F L O W ~ version 19.01.0
Pulling nextflow-io/hello ...
downloaded from https://github.com/nextflow-io/hello.git
Launching `nextflow-io/hello` [trusting_euler] - revision: baba3959d7 [v1.1]
[warm up] executor > local
[34/9cacca] Submitted process > sayHello (2)
[71/31d426] Submitted process > sayHello (1)
[e0/c7cdb4] Submitted process > sayHello (3)
Ciao world! (version 1.1)
Bojour world! (version 1.1)
[c2/40df8c] Submitted process > sayHello (4)
Hello world! (version 1.1)
Hola world! (version 1.1)
Logic (and input data) of this example workflow is adapted from EMBL-ABR Snakemake webinar by Nathan Watson-Haigh
#!/usr/bin/env nextflow
//Build link to reference
referenceLink = params.ref.base_url + params.ref.chr + ".fsa.zip"
//Take accessions defined in nextflow.config.
//Use --take N to process first N accessions or --take all to process all
accessionsChannel = Channel.from(params.accessions).take( params.take == 'all' ? -1 : params.take )
//fetch adapters file - either local or remote
adaptersChannel = Channel.fromPath(params.adapters)
process download_chromosome {
tag { params.ref.chr }
//Prevent re-downloading of large files
// storeDir { executor == 'awsbatch' ? null : "${params.outdir}/downloaded" } //use with care, caching will not work as normal so changes to input may not take effect
storeDir { "${params.outdir}/downloaded" } //use with care, caching will not work as normal so changes to input may not take effect
scratch false //must be false otherwise storeDir ignored
input:
referenceLink
output:
file('*') into references
script:
"""
wget ${referenceLink}
"""
}
process bgzip_chromosome {
cpus '2' //consider defining in conf/requirements.config based on process name or label
tag { ref }
input:
file ref from references
output:
file('*') into chromosomesChannel
script:
"""
unzip -p ${ref} \
| bgzip --threads ${task.cpus} \
> ${ref}.gz
"""
}
process bgzip_chromosome_subregion {
input:
file chr from chromosomesChannel
output:
file('subregion') into subregionsChannel
script:
"""
samtools faidx ${chr} ${params.ref.chr}:${params.ref.start}-${params.ref.end} \
| bgzip --threads ${task.cpus} \
> subregion
"""
}
process extract_reads {
tag { accession }
input:
val accession from accessionsChannel
//e.g. ACBarrie
output:
set val(accession), file('*.fastq.gz') into (extractedReadsChannelA, extractedReadsChannelB)
//e.g. ACBarrie.realigned.bam.bai, ACBarrie_R1.fastq.gz, ACBarrie_R2.fastq.gz
script:
"""
samtools view -hu "${params.bam.base_url}/chr4A_part2/${accession}.realigned.bam" \
${params.bam.chr}:${params.bam.start}-${params.bam.end} \
| samtools collate -uO - \
| samtools fastq -F 0x900 -1 ${accession}_R1.fastq.gz -2 ${accession}_R2.fastq.gz \
-s /dev/null -0 /dev/null -
"""
}
process fastqc_raw {
tag { accession }
input:
set val(accession), file('*') from extractedReadsChannelA
output:
file('*') into fastqcRawResultsChannel
script:
"""
fastqc --quiet --threads ${task.cpus} *
"""
}
process multiqc_raw {
input:
file('*') from fastqcRawResultsChannel.collect()
output:
file('*') into multiqcRawResultsChannel
script:
"""
multiqc .
"""
}
process trimmomatic_pe {
echo true
tag {accession}
input:
set file(adapters), val(accession), file('*') from adaptersChannel.combine(extractedReadsChannelB)
output:
set val(accession), file('*.paired.fastq.gz') into (trimmedReadsChannelA, trimmedReadsChannelB)
script:
"""
trimmomatic PE \
*.fastq.gz \
${accession}_R1.paired.fastq.gz \
${accession}_R1.unpaired.fastq.gz \
${accession}_R2.paired.fastq.gz \
${accession}_R2.unpaired.fastq.gz \
ILLUMINACLIP:${adapters}:2:30:10:3:true \
LEADING:2 \
TRAILING:2 \
SLIDINGWINDOW:4:15 \
MINLEN:36
"""
}
process fastqc_trimmed {
tag { accession }
input:
set val(accession), file('*') from trimmedReadsChannelB
output:
file('*') into fastqcTrimmedResultsChannel
script:
"""
fastqc --quiet --threads ${task.cpus} *
"""
}
process multiqc_trimmed {
input:
file('*') from fastqcTrimmedResultsChannel.collect()
output:
file('*') into multiqcTrimmedResultsChannel
script:
"""
multiqc .
"""
}
process bwa_index {
input:
file(ref) from subregionsChannel
output:
set val(ref.name), file("*") into indexChannel //also valid: set val("${ref}"), file("*") into indexChannel
script:
"""
bwa index -a bwtsw ${ref}
"""
}
process bwa_mem {
tag { accession }
input:
set val(ref), file('*'), val(accession), file(reads) from indexChannel.combine(trimmedReadsChannelA)
output:
file('*.bam') into alignedReadsChannel
script:
"""
bwa mem -t ${task.cpus} -R '@RG\\tID:${accession}\\tSM:${accession}' ${ref} ${reads} | samtools view -b > ${accession}.bam
"""
}
Refresh page to see the embedded asciicast or go to https://asciinema.org/a/233197
N E X T F L O W ~ version 19.01.0
Launching `../main.nf` [magical_swirles] - revision: 3d860e45c5
[warm up] executor > local
[skipping] Stored process > download_chromosome (chr4A)
[18/8ff009] Submitted process > extract_reads (ACBarrie)
[40/8f35a9] Submitted process > bgzip_chromosome (iwgsc_refseqv1.0_chr4A.fsa.zip)
[d3/11f8b8] Submitted process > fastqc_raw (ACBarrie)
[00/aac6bd] Submitted process > trimmomatic_pe (ACBarrie)
[1c/82c66e] Submitted process > bgzip_chromosome_subregion
[d4/704d48] Submitted process > fastqc_trimmed (ACBarrie)
[72/bd224a] Submitted process > bwa_index
[11/6a9704] Submitted process > multiqc_raw
[88/1fa33a] Submitted process > bwa_mem (ACBarrie)
[a7/7179c3] Submitted process > multiqc_trimmed
work
├── 00
│ └── aac6bd3585c6c543a1a4b844669445
├── 11
│ └── 6a970455b6ffa7dc6b6d1d8e1271f7
├── 18
│ └── 8ff0099360323919d6a913a7644417
├── 1c
│ └── 82c66efb81a73f83f2d26bbc11a832
├── 29
│ └── 7322cb44afbddad58644703c96af03
├── 34
│ └── 9caccaf9a7f4ead851122aa9eb8df1
├── 3d
│ └── 0965114dcb0ebc9cbe914d0c6dd38e
├── 3e
│ └── d1cb36a34c2411314a3da3fd7e07d8
├── 40
│ └── 8f35a926aa17a41c8e525d6234a4c3
├── 4a
│ ├── b27aaaeb480877acd48df4276b80d8
│ └── ee2b0f98475c6b0d37d5c6939f8864
├── 52
│ └── cf99f6bcba32189dd382a5da1c3e94
├── 6e
│ └── 7e613a8eeb344a4940d435a10889d5
├── 71
│ ├── 31d426f1d0ca3946078de5c955efdf
│ └── 592370d910d8c8deb7aa3ecced77ea
├── 72
│ └── bd224a82323aee42837ad271540977
├── 75
│ └── ad1323892a4856d41a0d519471add5
├── 81
│ └── ef61d4ca716810ca49ed8f3f731756
├── 88
│ └── 1fa33ae04c28cf5eea39f3cf54a637
├── 91
│ └── 325a2284fadfcf491e9268ee8cfbd1
├── a7
│ ├── 7179c34a3f77f2baa721305221699e
│ └── bdd38de6c65dd60612ba4bba1e5749
├── ad
│ └── 9b9bba55b5b4726c06b31f8104022b
├── ae
│ └── c69b493a391e5e83b58a76492cfded
├── b5
│ └── eddaa2f85860553209471faa13f41e
├── bb
│ └── f1df95a87a8305f1e0736f27102ea0
├── c0
│ ├── acf09b0b2c4ec8c646744d849301f7
│ └── e6bb1cf112c973ab17d7d440728155
├── c2
│ └── 40df8ce5e76cf975f05fb875afb2df
├── d1
│ └── 5acc8cc13e1a5a149dc187b4c5b096
├── d2
│ └── a9cc60b86d27cf35d41a64e5e84951
├── d3
│ └── 11f8b865d2c613f15f0fd51bf476c1
├── d4
│ └── 704d48467700dc73430015065888f4
├── e0
│ └── c7cdb4e696b497314f7f9776e95c61
├── e4
│ └── 46448fa82bcf8874b615981a7b87db
├── e7
│ └── eb51fdeebfe3f932afe25b8fb5f4bf
├── e8
│ └── 71d37c1ff8d0a40444574aed5d45e9
└── ee
└── e5af5a4b2cebcc65ddde76661f85cc
72 directories, 0 files
work
├── [4.0K] 00
│ └── [4.0K] aac6bd3585c6c543a1a4b844669445
│ ├── [ 106] ACBarrie_R1.fastq.gz -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/18/8ff0099360323919d6a913a7644417/ACBarrie_R1.fastq.gz
│ ├── [154K] ACBarrie_R1.paired.fastq.gz
│ ├── [1.0K] ACBarrie_R1.unpaired.fastq.gz
│ ├── [ 106] ACBarrie_R2.fastq.gz -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/18/8ff0099360323919d6a913a7644417/ACBarrie_R2.fastq.gz
│ ├── [156K] ACBarrie_R2.paired.fastq.gz
│ ├── [ 580] ACBarrie_R2.unpaired.fastq.gz
│ ├── [ 0] .command.begin
│ ├── [ 882] .command.err
│ ├── [ 882] .command.log
│ ├── [ 0] .command.out
│ ├── [2.9K] .command.run
│ ├── [ 290] .command.sh
│ ├── [3.6K] .command.stub
│ ├── [ 173] .command.trace
│ ├── [ 1] .exitcode
│ ├── [4.0K] tmp
│ │ └── [4.0K] 65
│ │ └── [4.0K] eb25bead4e125485f2ad3b89dea6d8
│ │ └── [ 93] TruSeq3-PE.fa
│ └── [ 137] TruSeq3-PE.fa -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/00/aac6bd3585c6c543a1a4b844669445/tmp/65/eb25bead4e125485f2ad3b89dea6d8/TruSeq3-PE.fa
├── [4.0K] 11
│ └── [4.0K] 6a970455b6ffa7dc6b6d1d8e1271f7
│ ├── [ 109] ACBarrie_R1_fastqc.html -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/d3/11f8b865d2c613f15f0fd51bf476c1/ACBarrie_R1_fastqc.html
│ ├── [ 108] ACBarrie_R1_fastqc.zip -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/d3/11f8b865d2c613f15f0fd51bf476c1/ACBarrie_R1_fastqc.zip
│ ├── [ 109] ACBarrie_R2_fastqc.html -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/d3/11f8b865d2c613f15f0fd51bf476c1/ACBarrie_R2_fastqc.html
│ ├── [ 108] ACBarrie_R2_fastqc.zip -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/d3/11f8b865d2c613f15f0fd51bf476c1/ACBarrie_R2_fastqc.zip
│ ├── [ 0] .command.begin
│ ├── [ 397] .command.err
│ ├── [ 397] .command.log
│ ├── [ 0] .command.out
│ ├── [3.0K] .command.run
│ ├── [ 26] .command.sh
│ ├── [3.6K] .command.stub
│ ├── [ 174] .command.trace
│ ├── [ 1] .exitcode
│ ├── [4.0K] multiqc_data
│ │ ├── [126K] multiqc_data.json
│ │ ├── [ 807] multiqc_fastqc.txt
│ │ ├── [ 399] multiqc_general_stats.txt
│ │ ├── [ 12K] multiqc.log
│ │ └── [ 316] multiqc_sources.txt
│ └── [1.1M] multiqc_report.html
├── [4.0K] 18
│ └── [4.0K] 8ff0099360323919d6a913a7644417
│ ├── [200K] ACBarrie_R1.fastq.gz
│ ├── [202K] ACBarrie_R2.fastq.gz
│ ├── [753K] ACBarrie.realigned.bam.bai
│ ├── [ 0] .command.begin
│ ├── [ 87] .command.err
│ ├── [ 87] .command.log
│ ├── [ 0] .command.out
│ ├── [2.4K] .command.run
│ ├── [ 432] .command.sh
│ ├── [3.6K] .command.stub
│ ├── [ 160] .command.trace
│ └── [ 1] .exitcode
├── [4.0K] 1c
│ └── [4.0K] 82c66efb81a73f83f2d26bbc11a832
│ ├── [ 0] .command.begin
│ ├── [ 0] .command.err
│ ├── [ 0] .command.log
│ ├── [ 0] .command.out
│ ├── [2.6K] .command.run
│ ├── [ 131] .command.sh
│ ├── [3.6K] .command.stub
│ ├── [ 165] .command.trace
│ ├── [ 1] .exitcode
│ ├── [ 119] iwgsc_refseqv1.0_chr4A.fsa.zip.gz -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/40/8f35a926aa17a41c8e525d6234a4c3/iwgsc_refseqv1.0_chr4A.fsa.zip.gz
│ ├── [ 24] iwgsc_refseqv1.0_chr4A.fsa.zip.gz.fai
│ ├── [181K] iwgsc_refseqv1.0_chr4A.fsa.zip.gz.gzi
│ └── [ 17K] subregion
├── [4.0K] 40
│ └── [4.0K] 8f35a926aa17a41c8e525d6234a4c3
│ ├── [ 0] .command.begin
│ ├── [ 0] .command.err
│ ├── [ 0] .command.log
│ ├── [ 0] .command.out
│ ├── [2.5K] .command.run
│ ├── [ 120] .command.sh
│ ├── [3.6K] .command.stub
│ ├── [ 188] .command.trace
│ ├── [ 1] .exitcode
│ ├── [ 96] iwgsc_refseqv1.0_chr4A.fsa.zip -> /home/rad/repos/nextflow-embl-abr-webinar/docs/results/downloaded/iwgsc_refseqv1.0_chr4A.fsa.zip
│ └── [216M] iwgsc_refseqv1.0_chr4A.fsa.zip.gz
├── [4.0K] 72
│ └── [4.0K] bd224a82323aee42837ad271540977
│ ├── [ 0] .command.begin
│ ├── [ 480] .command.err
│ ├── [ 480] .command.log
│ ├── [ 0] .command.out
│ ├── [2.5K] .command.run
│ ├── [ 45] .command.sh
│ ├── [3.6K] .command.stub
│ ├── [ 163] .command.trace
│ ├── [ 1] .exitcode
│ ├── [ 95] subregion -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/1c/82c66efb81a73f83f2d26bbc11a832/subregion
│ ├── [ 33] subregion.amb
│ ├── [ 56] subregion.ann
│ ├── [ 57K] subregion.bwt
│ ├── [ 14K] subregion.pac
│ └── [ 28K] subregion.sa
├── [4.0K] 88
│ └── [4.0K] 1fa33ae04c28cf5eea39f3cf54a637
│ ├── [432K] ACBarrie.bam
│ ├── [ 113] ACBarrie_R1.paired.fastq.gz -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/00/aac6bd3585c6c543a1a4b844669445/ACBarrie_R1.paired.fastq.gz
│ ├── [ 113] ACBarrie_R2.paired.fastq.gz -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/00/aac6bd3585c6c543a1a4b844669445/ACBarrie_R2.paired.fastq.gz
│ ├── [ 0] .command.begin
│ ├── [1.2K] .command.err
│ ├── [1.2K] .command.log
│ ├── [ 0] .command.out
│ ├── [3.4K] .command.run
│ ├── [ 164] .command.sh
│ ├── [3.6K] .command.stub
│ ├── [ 154] .command.trace
│ ├── [ 1] .exitcode
│ ├── [ 99] subregion.amb -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/72/bd224a82323aee42837ad271540977/subregion.amb
│ ├── [ 99] subregion.ann -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/72/bd224a82323aee42837ad271540977/subregion.ann
│ ├── [ 99] subregion.bwt -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/72/bd224a82323aee42837ad271540977/subregion.bwt
│ ├── [ 99] subregion.pac -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/72/bd224a82323aee42837ad271540977/subregion.pac
│ └── [ 98] subregion.sa -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/72/bd224a82323aee42837ad271540977/subregion.sa
├── [4.0K] a7
│ └── [4.0K] 7179c34a3f77f2baa721305221699e
│ ├── [ 116] ACBarrie_R1.paired_fastqc.html -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/d4/704d48467700dc73430015065888f4/ACBarrie_R1.paired_fastqc.html
│ ├── [ 115] ACBarrie_R1.paired_fastqc.zip -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/d4/704d48467700dc73430015065888f4/ACBarrie_R1.paired_fastqc.zip
│ ├── [ 116] ACBarrie_R2.paired_fastqc.html -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/d4/704d48467700dc73430015065888f4/ACBarrie_R2.paired_fastqc.html
│ ├── [ 115] ACBarrie_R2.paired_fastqc.zip -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/d4/704d48467700dc73430015065888f4/ACBarrie_R2.paired_fastqc.zip
│ ├── [ 0] .command.begin
│ ├── [ 397] .command.err
│ ├── [ 397] .command.log
│ ├── [ 0] .command.out
│ ├── [3.1K] .command.run
│ ├── [ 26] .command.sh
│ ├── [3.6K] .command.stub
│ ├── [ 177] .command.trace
│ ├── [ 1] .exitcode
│ ├── [4.0K] multiqc_data
│ │ ├── [119K] multiqc_data.json
│ │ ├── [ 835] multiqc_fastqc.txt
│ │ ├── [ 413] multiqc_general_stats.txt
│ │ ├── [ 12K] multiqc.log
│ │ └── [ 344] multiqc_sources.txt
│ └── [1.1M] multiqc_report.html
├── [4.0K] d3
│ └── [4.0K] 11f8b865d2c613f15f0fd51bf476c1
│ ├── [698K] ACBarrie_R1_fastqc.html
│ ├── [460K] ACBarrie_R1_fastqc.zip
│ ├── [ 106] ACBarrie_R1.fastq.gz -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/18/8ff0099360323919d6a913a7644417/ACBarrie_R1.fastq.gz
│ ├── [703K] ACBarrie_R2_fastqc.html
│ ├── [472K] ACBarrie_R2_fastqc.zip
│ ├── [ 106] ACBarrie_R2.fastq.gz -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/18/8ff0099360323919d6a913a7644417/ACBarrie_R2.fastq.gz
│ ├── [ 0] .command.begin
│ ├── [ 0] .command.err
│ ├── [ 0] .command.log
│ ├── [ 0] .command.out
│ ├── [2.7K] .command.run
│ ├── [ 46] .command.sh
│ ├── [3.6K] .command.stub
│ ├── [ 188] .command.trace
│ └── [ 1] .exitcode
└── [4.0K] d4
└── [4.0K] 704d48467700dc73430015065888f4
├── [699K] ACBarrie_R1.paired_fastqc.html
├── [465K] ACBarrie_R1.paired_fastqc.zip
├── [ 113] ACBarrie_R1.paired.fastq.gz -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/00/aac6bd3585c6c543a1a4b844669445/ACBarrie_R1.paired.fastq.gz
├── [707K] ACBarrie_R2.paired_fastqc.html
├── [468K] ACBarrie_R2.paired_fastqc.zip
├── [ 113] ACBarrie_R2.paired.fastq.gz -> /home/rad/repos/nextflow-embl-abr-webinar/docs/work/00/aac6bd3585c6c543a1a4b844669445/ACBarrie_R2.paired.fastq.gz
├── [ 0] .command.begin
├── [ 0] .command.err
├── [ 0] .command.log
├── [ 0] .command.out
├── [2.7K] .command.run
├── [ 45] .command.sh
├── [3.6K] .command.stub
├── [ 187] .command.trace
└── [ 1] .exitcode
25 directories, 153 files
nextflow.config$HOME/.nextflow/configincludeConfigincludeConfig 'conf/publish.config'
-c additional.config-C custom.configenv, params, process, docker…nextflow.config et al.params {
take = 1 //can be overwritten at run-time e.g. --take 2 to just process first two accessions or --take all to process all
accessions = [
"ACBarrie",
"Alsen",
"Baxter",
"Chara",
"Drysdale",
"Excalibur",
"Gladius",
"H45",
"Kukri",
"Pastor",
"RAC875",
"Volcanii",
"Westonia",
"Wyalkatchem",
"Xiaoyan",
"Yitpi"
]
adapters = "https://raw.githubusercontent.com/timflutre/trimmomatic/master/adapters/TruSeq3-PE.fa"
ref {
base_url = "https://urgi.versailles.inra.fr/download/iwgsc/IWGSC_RefSeq_Assemblies/v1.0/iwgsc_refseqv1.0_"
chr = "chr4A"
start = "688055092"
end = "688113092"
}
bam {
base_url = "http://crobiad.agwine.adelaide.edu.au/dawn/jbrowse-prod/data/local/by_chr/mapped_reads_merged/161010_Chinese_Spring_v1.0_pseudomolecules_parts.fasta.gz/minimap2_defaults/whole_genome/PE/BPA"
chr = "chr4A_part2"
start = "235500000"
end = "235558000"
}
outdir = "./results" //can be overwritten at run-time e.g. --outdir dirname
infodir = "./flowinfo" //can be overwritten at run-time e.g. --infodir dirname
}
process {
cache = 'lenient'
}
profiles {
//SOFTWARE
conda {
process {
conda = "$baseDir/conf/conda.yaml"
}
}
condamodule {
process.module = 'miniconda3/4.3.24'
}
docker {
process.container = 'rsuchecki/nextflow-embl-abr-webinar'
docker {
enabled = true
fixOwnership = true
}
}
singularity {
singularity {
enabled = true
autoMounts = true
cacheDir = "singularity-images" //when distibuting the pipeline probably should point under $workDir
}
process {
container = 'shub://csiro-crop-informatics/nextflow-embl-abr-webinar' //Singularity hub
// container = 'rsuchecki/nextflow-embl-abr-webinar' //pulled from Docker hub - would suffice but Singularity container is re-built from docker image so not ideal for reproducibility
scratch = true //This is a hack - currently required as a workaround for https://github.com/sylabs/singularity/issues/1469#issuecomment-469129088
}
}
singularitymodule {
process.module = 'singularity/3.0.3' //Specific to our cluster - update as required
}
//EXECUTORS
awsbatch {
aws.region = 'ap-southeast-2'
process {
executor = 'awsbatch'
queue = 'flowq'
process.container = 'rsuchecki/nextflow-embl-abr-webinar'
}
executor {
awscli = '/home/ec2-user/miniconda/bin/aws'
}
}
slurm {
process {
executor = 'slurm'
scratch = true
}
}
}
//PUBLIS RESULTS
params.publishmode = "copy"
includeConfig 'conf/publish.config'
//GENERATE REPORT https://www.nextflow.io/docs/latest/tracing.html//trace-report
report {
enabled = true
file = "${params.infodir}/report.html"
}
//GENERATE TIMELINE https://www.nextflow.io/docs/latest/tracing.html//timeline-report
timeline {
enabled = true
timeline.file = "${params.infodir}/timeline.html"
}
//GENERATE PIPELINE TRACE https://www.nextflow.io/docs/latest/tracing.html//trace-report
trace {
enabled = true
file = "${params.infodir}/trace.txt"
}
//GENERATE GRAPH REPRESENTATION OF THE PIPELINE FLOW
dag {
enabled = true
file = "${params.infodir}/flowchart.dot"
// file = "${params.infodir}/dag.png"
}
#Local/server
nextflow run csiro-crop-informatics/nextflow-embl-abr-webinar -profile conda
nextflow run csiro-crop-informatics/nextflow-embl-abr-webinar -profile docker
nextflow run csiro-crop-informatics/nextflow-embl-abr-webinar -profile singularity
#HPC
nextflow run csiro-crop-informatics/nextflow-embl-abr-webinar -profile slurm,conda,condamodule
nextflow run csiro-crop-informatics/nextflow-embl-abr-webinar -profile slurm,singularity,singularitymodule
#Cloud
nextflow run csiro-crop-informatics/nextflow-embl-abr-webinar -profile awsbatch \
-work-dir s3://your_s3_bucket/work --outdir s3://your_s3_bucket/resultsExample workflow: global Conda env -> Docker -> Singularity
Conda fail: the infinite “solving environment” wheel of death.
— Nils Homer (@nilshomer) February 7, 2019
I just downloaded the software and built from source. It was much faster.#conda #bioconda #wheelofdeath
name: tutorial
channels:
- bioconda
- conda-forge
- default
dependencies:
- fastqc=0.11.8
- multiqc=1.7
- trimmomatic=0.36
- pigz=2.3.4
- bwa=0.7.17
- samtools=1.9
- htslib=1.9
- unzip=6.0
- tabix=0.2.6
- gnu-wget=1.18
nextflow.configprofiles {
conda {
process {
conda = "$baseDir/conf/conda.yaml"
}
}
}
FROM rsuchecki/miniconda3:4.5.12
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
LABEL maintainer="Rad Suchecki <rad.suchecki@csiro.au>"
SHELL ["/bin/bash", "-c"]
COPY conf/conda.yaml /
RUN conda env create -f /conda.yaml && conda clean -a
ENV PATH /opt/conda/envs/tutorial/bin:$PATH
nextflow.config docker {
process.container = 'rsuchecki/nextflow-embl-abr-webinar'
docker {
enabled = true
fixOwnership = true
}
}
-profile dockerBootstrap:docker
From:rsuchecki/nextflow-embl-abr-webinar:latest
nextflow.config singularity {
singularity {
enabled = true
autoMounts = true
}
process {
container = 'shub://csiro-crop-informatics/nextflow-embl-abr-webinar' //Singularity hub
scratch = true //This is a hack - currently required as a workaround for https://github.com/sylabs/singularity/issues/1469#issuecomment-469129088
}
}
-profile singularitypublishDir directive
process {
withName: multiqc_raw {
publishDir {
path = "${params.outdir}/qc_raw"
mode = "${params.publishmode}"
}
}
withName: multiqc_trimmed {
publishDir {
path = "${params.outdir}/qc_trimmed"
mode = "${params.publishmode}"
}
}
withName: bwa_mem {
publishDir {
path = "${params.outdir}/bwa"
mode = "${params.publishmode}"
}
}
//Currently not applied, add:
//label 'stats'
//at the top of a process definition to store declared outputs as follows
withLabel: stats {
publishDir {
path = "${params.outdir}/stats"
mode = "${params.publishmode}"
}
}
}
results
├── bwa
│ └── ACBarrie.bam
├── downloaded
│ └── iwgsc_refseqv1.0_chr4A.fsa.zip
├── flowinfo
│ ├── report.html
│ ├── timeline.html
│ ├── trace.txt
│ └── trace.txt.1
├── qc_raw
│ ├── multiqc_data
│ └── multiqc_report.html
└── qc_trimmed
├── multiqc_data
└── multiqc_report.html
7 directories, 8 files